{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "seven-remainder",
   "metadata": {},
   "source": [
    "# demo of generating Figure 7 based on I-SAT\n",
    "# I-SAT demo using ICE-Sat2 orbit dataset and Sentinel 2 metadata\n",
    "# I-SAT have independent spatial join and temporal filtering \n",
    "## This demo could be used to find Spatio-temporal intersection between the two datasets and the potential area of interest at hour level"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "impossible-tribe",
   "metadata": {},
   "source": [
    "## please input date range for query, we have Sentinel dataset for 2020 in this demo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "ready-facial",
   "metadata": {},
   "outputs": [],
   "source": [
    "### test first spatial join (consider hour delay) then temporal join "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "angry-environment",
   "metadata": {},
   "outputs": [],
   "source": [
    "# date in the format of \"yyy-MM-dd\", results include both start date and end date\n",
    "\n",
    "# The hour delay variable represents how many hours apart the two datasets are considered to be temporally intersected\n",
    "# E.g. if hour_delay = 6, for 2020-06-26 12:00:00, timestamps between 2020-06-26 06:00:00 and 2020-06-26 18:00:00 are considered to be temporally intersected\n",
    "start_date = '2020-01-01'\n",
    "end_date = '2020-03-31'\n",
    "duration_month = 3 # for export\n",
    "\n",
    "hour_delay = 6"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "certain-survivor",
   "metadata": {},
   "source": [
    "## please input the potential area of interest, we have \"Beaufort_Sea\" and \"Wandel_Sea\" in in this demo\n",
    "## if not using AOIs, could leave blank"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "romantic-white",
   "metadata": {},
   "outputs": [],
   "source": [
    "Paoi = 'Wandel_Sea'"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "organizational-default",
   "metadata": {},
   "source": [
    "## First, setup the Spark and Sedona environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "still-album",
   "metadata": {},
   "outputs": [],
   "source": [
    "import findspark\n",
    "#findspark.init() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "desperate-suggestion",
   "metadata": {},
   "outputs": [],
   "source": [
    "SPARK_HOME='/opt/cloudera/parcels/CDH/lib/spark'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "liquid-print",
   "metadata": {},
   "outputs": [],
   "source": [
    "findspark.init(SPARK_HOME)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "comparable-easter",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/zhangp/.conda/envs/py37_environment/lib/python3.7/site-packages/geopandas/_compat.py:58: UserWarning: The installed version of PyGEOS is too old (0.6 installed, 0.8 required), and thus GeoPandas will not use PyGEOS.\n",
      "  UserWarning,\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "import os\n",
    "import codecs\n",
    "import subprocess\n",
    "#from hdfs import InsecureClient\n",
    "import numpy as np\n",
    "#from pyspark import SparkContext\n",
    "from pyspark import SQLContext\n",
    "from pyspark.sql import Row\n",
    "from pyspark.sql import functions as F\n",
    "from pyspark.sql.types import *\n",
    "import rtree\n",
    "from pyspark.sql import Window\n",
    "#import igraph\n",
    "#from igraph import Graph\n",
    "import geofeather"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "preceding-success",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.sql import SparkSession\n",
    "from pyspark import StorageLevel\n",
    "import geopandas as gpd\n",
    "import pandas as pd\n",
    "from pyspark.sql.types import StructType\n",
    "from pyspark.sql.types import StructField\n",
    "from pyspark.sql.types import StringType\n",
    "from pyspark.sql.types import LongType\n",
    "from shapely.geometry import Point\n",
    "from shapely.geometry import Polygon\n",
    "\n",
    "from sedona.register import SedonaRegistrator\n",
    "from sedona.core.SpatialRDD import SpatialRDD\n",
    "from sedona.core.SpatialRDD import PointRDD\n",
    "from sedona.core.SpatialRDD import PolygonRDD\n",
    "from sedona.core.SpatialRDD import LineStringRDD\n",
    "from sedona.core.enums import FileDataSplitter\n",
    "from sedona.utils.adapter import Adapter\n",
    "from sedona.core.spatialOperator import KNNQuery\n",
    "from sedona.core.spatialOperator import JoinQuery\n",
    "from sedona.core.spatialOperator import JoinQueryRaw\n",
    "from sedona.core.spatialOperator import RangeQuery\n",
    "from sedona.core.spatialOperator import RangeQueryRaw\n",
    "from sedona.core.formatMapper.shapefileParser import ShapefileReader\n",
    "from sedona.core.formatMapper import WkbReader\n",
    "from sedona.core.formatMapper import WktReader\n",
    "from sedona.core.formatMapper import GeoJsonReader\n",
    "from sedona.sql.types import GeometryType\n",
    "from sedona.core.enums import GridType\n",
    "from sedona.core.SpatialRDD import RectangleRDD\n",
    "from sedona.core.enums import IndexType\n",
    "from sedona.core.geom.envelope import Envelope\n",
    "from sedona.utils import SedonaKryoRegistrator, KryoSerializer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "environmental-discharge",
   "metadata": {},
   "outputs": [],
   "source": [
    "os.environ['PYSPARK_PYTHON'] = \"./environment/bin/python\"\n",
    "os.environ['YARN_CONF_DIR'] = \"/opt/cloudera/parcels/CDH/lib/spark/conf/yarn-conf\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "dietary-antibody",
   "metadata": {},
   "outputs": [],
   "source": [
    "spark = SparkSession \\\n",
    ".builder \\\n",
    ".appName(\"test_test_1\") \\\n",
    ".master('yarn') \\\n",
    ".config(\"spark.serializer\", KryoSerializer.getName) \\\n",
    ".config(\"spark.kryo.registrator\", SedonaKryoRegistrator.getName) \\\n",
    ".config('spark.jars','sedona-core-2.4_2.11-1.0.0-incubating.jar,sedona-sql-2.4_2.11-1.0.0-incubating.jar,sedona-python-adapter-2.4_2.11-1.0.0-incubating.jar,sedona-viz-2.4_2.11-1.0.0-incubating.jar,geotools-wrapper-geotools-24.0.jar') \\\n",
    ".config('spark.executor.memory', '20g') \\\n",
    ".config('spark.driver.memory', '10g') \\\n",
    ".config('spark.sql.shuffle.partitions', 6144) \\\n",
    ".config('spark.executor.instances', '24') \\\n",
    ".config('spark.executor.cores', '5') \\\n",
    ".config('spark.rpc.message.maxSize', '1024') \\\n",
    ".config('spark.yarn.dist.archives', 'environment.tar.gz#environment') \\\n",
    ".getOrCreate()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "intellectual-coating",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "SedonaRegistrator.registerAll(spark)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "expected-planner",
   "metadata": {},
   "source": [
    "## Second, read datasets"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "spatial-export",
   "metadata": {},
   "source": [
    "### read ICE_SAT2 orbits at hour level, 55935 orbits in total\n",
    "### Read ICESat-2 data, Spark have different types of input methods, here we read from Hadoop file system, as our Spark 2.4 is built based on Hadoop Yarn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "following-rocket",
   "metadata": {},
   "outputs": [],
   "source": [
    "is2_df_raw = spark.read.option(\"header\",True).option('inferSchema', True).csv(\"ICE_Sat_hour_cycle_orbits_split\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "spiritual-medicare",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "79090"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "is2_df_raw.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "superb-diagram",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<function __main__.orbit_date(s)>"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# extract collecting time of the orbit from filename, only at level of day for temporal granularity for now\n",
    "from datetime import datetime\n",
    "def orbit_date(s):\n",
    "    length = len(s)\n",
    "    date_str = s[(length-19):(length-8)]\n",
    "    date_obj = datetime.strptime(date_str, '%d-%b-%Y')\n",
    "#   return date_obj.strftime(\"%Y-%m-%d\")\n",
    "    return date_obj.strftime(\"%Y-%m-%d\")\n",
    "\n",
    "# Add date UDF\n",
    "spark.udf.register(\"orbit_date\", orbit_date)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "designed-freeware",
   "metadata": {},
   "outputs": [],
   "source": [
    "# is2_df_raw.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "incorporated-extra",
   "metadata": {},
   "outputs": [],
   "source": [
    "is2_df_raw = is2_df_raw.withColumn('date_date_type', (F.unix_timestamp(\"date_day\") + F.col(\"day_hour\") * 3600).cast('timestamp'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "monetary-forty",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------------------+--------------+-------+---------+-------------------+--------+--------------------+--------------------+-------------------+\n",
      "|           date_day|      split_id|day_key|day_cycle|     day_exact_time|day_hour|         Description|     formed_line_WKT|     date_date_type|\n",
      "+-------------------+--------------+-------+---------+-------------------+--------+--------------------+--------------------+-------------------+\n",
      "|2019-12-23 00:00:00|            -1|   1330|        5|2019-12-23 00:00:08|       0|RGT 1330 23-Dec-2...|LINESTRING (106.7...|2019-12-23 00:00:00|\n",
      "|2019-12-23 00:00:00|            -1|   1330|        5|2019-12-23 01:00:08|       1|RGT 1330 23-Dec-2...|LINESTRING (-92.1...|2019-12-23 01:00:00|\n",
      "|2019-12-23 00:00:00|45603962748928|   1331|        5|2019-12-23 01:04:25|       1|RGT 1331 23-Dec-2...|LINESTRING (-93.8...|2019-12-23 01:00:00|\n",
      "|2019-12-23 00:00:00|            -1|   1331|        5|2019-12-23 01:28:25|       1|RGT 1331 23-Dec-2...|LINESTRING (127.8...|2019-12-23 01:00:00|\n",
      "|2019-12-23 00:00:00|            -1|   1331|        5|2019-12-23 02:00:25|       2|RGT 1331 23-Dec-2...|LINESTRING (70.78...|2019-12-23 02:00:00|\n",
      "|2019-12-23 00:00:00|            -1|   1332|        5|2019-12-23 02:38:43|       2|RGT 1332 23-Dec-2...|LINESTRING (-117....|2019-12-23 02:00:00|\n",
      "|2019-12-23 00:00:00|30537217474562|   1332|        5|2019-12-23 03:00:43|       3|RGT 1332 23-Dec-2...|LINESTRING (-141....|2019-12-23 03:00:00|\n",
      "|2019-12-23 00:00:00|            -1|   1332|        5|2019-12-23 03:02:43|       3|RGT 1332 23-Dec-2...|LINESTRING (103.6...|2019-12-23 03:00:00|\n",
      "|2019-12-23 00:00:00|            -1|   1332|        5|2019-12-23 04:00:43|       4|RGT 1332 23-Dec-2...|LINESTRING (-135....|2019-12-23 04:00:00|\n",
      "|2019-12-23 00:00:00|12360915877888|   1333|        5|2019-12-23 04:13:00|       4|RGT 1333 23-Dec-2...|LINESTRING (-141....|2019-12-23 04:00:00|\n",
      "|2019-12-23 00:00:00|            -1|   1333|        5|2019-12-23 04:36:00|       4|RGT 1333 23-Dec-2...|LINESTRING (168.0...|2019-12-23 04:00:00|\n",
      "|2019-12-23 00:00:00|            -1|   1333|        5|2019-12-23 05:00:00|       5|RGT 1333 23-Dec-2...|LINESTRING (27.29...|2019-12-23 05:00:00|\n",
      "|2019-12-23 00:00:00|            -1|   1334|        5|2019-12-23 05:47:17|       5|RGT 1334 23-Dec-2...|LINESTRING (-164....|2019-12-23 05:00:00|\n",
      "|2019-12-23 00:00:00|34024730918912|   1334|        5|2019-12-23 06:00:17|       6|RGT 1334 23-Dec-2...|LINESTRING (-170....|2019-12-23 06:00:00|\n",
      "|2019-12-23 00:00:00|            -1|   1334|        5|2019-12-23 06:08:17|       6|RGT 1334 23-Dec-2...|LINESTRING (178.4...|2019-12-23 06:00:00|\n",
      "|2019-12-23 00:00:00|17961553231872|   1334|        5|2019-12-23 07:00:17|       7|RGT 1334 23-Dec-2...|LINESTRING (-170....|2019-12-23 07:00:00|\n",
      "|2019-12-23 00:00:00|            -1|   1334|        5|2019-12-23 07:05:17|       7|RGT 1334 23-Dec-2...|LINESTRING (179.5...|2019-12-23 07:00:00|\n",
      "|2019-12-23 00:00:00|            -1|   1335|        5|2019-12-23 07:21:35|       7|RGT 1335 23-Dec-2...|LINESTRING (171.5...|2019-12-23 07:00:00|\n",
      "|2019-12-23 00:00:00|42245298323457|   1335|        5|2019-12-23 08:00:35|       8|RGT 1335 23-Dec-2...|LINESTRING (-16.8...|2019-12-23 08:00:00|\n",
      "|2019-12-23 00:00:00|            -1|   1335|        5|2019-12-23 08:33:35|       8|RGT 1335 23-Dec-2...|LINESTRING (175.0...|2019-12-23 08:00:00|\n",
      "+-------------------+--------------+-------+---------+-------------------+--------+--------------------+--------------------+-------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "is2_df_raw.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "joint-senator",
   "metadata": {},
   "outputs": [],
   "source": [
    "# filter date range\n",
    "is2_df_raw = is2_df_raw.filter(F.col('date_date_type') <= F.lit(end_date)).filter(F.col('date_date_type') >= F.lit(start_date))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "complete-symposium",
   "metadata": {},
   "outputs": [],
   "source": [
    "# print(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "spectacular-jonathan",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4929"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "is2_df_raw.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "broadband-provincial",
   "metadata": {},
   "outputs": [],
   "source": [
    "is2_df_raw.createOrReplaceTempView(\"is2_df_raw\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "dietary-discussion",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Extract Spatial information from the WKT column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "saved-particle",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------+-------+---------+-------------------+-------------------+--------+--------------------+------------+\n",
      "|               orbit|day_key|day_cycle|     date_date_type|     day_exact_time|day_hour|         Description|is_timestamp|\n",
      "+--------------------+-------+---------+-------------------+-------------------+--------+--------------------+------------+\n",
      "|LINESTRING (-124....|    935|        6|2020-02-26 00:00:00|2020-02-26 00:00:15|       0|RGT 935 26-Feb-20...|  1582693200|\n",
      "|LINESTRING (-124....|    936|        6|2020-02-26 00:00:00|2020-02-26 00:00:32|       0|RGT 936 26-Feb-20...|  1582693200|\n",
      "|LINESTRING (97.63...|    936|        6|2020-02-26 00:00:00|2020-02-26 00:24:32|       0|RGT 936 26-Feb-20...|  1582693200|\n",
      "|LINESTRING (38.44...|    936|        6|2020-02-26 01:00:00|2020-02-26 01:00:32|       1|RGT 936 26-Feb-20...|  1582696800|\n",
      "|LINESTRING (-147....|    937|        6|2020-02-26 01:00:00|2020-02-26 01:34:50|       1|RGT 937 26-Feb-20...|  1582696800|\n",
      "|LINESTRING (161.0...|    937|        6|2020-02-26 01:00:00|2020-02-26 01:57:50|       1|RGT 937 26-Feb-20...|  1582696800|\n",
      "|LINESTRING (37.46...|    937|        6|2020-02-26 02:00:00|2020-02-26 02:00:50|       2|RGT 937 26-Feb-20...|  1582700400|\n",
      "|LINESTRING (-168....|    937|        6|2020-02-26 03:00:00|2020-02-26 03:00:50|       3|RGT 937 26-Feb-20...|  1582704000|\n",
      "|LINESTRING (-171....|    938|        6|2020-02-26 03:00:00|2020-02-26 03:09:07|       3|RGT 938 26-Feb-20...|  1582704000|\n",
      "|LINESTRING (178.8...|    938|        6|2020-02-26 03:00:00|2020-02-26 03:27:07|       3|RGT 938 26-Feb-20...|  1582704000|\n",
      "+--------------------+-------+---------+-------------------+-------------------+--------+--------------------+------------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# generate Linestring using ST_GeomFromWKT() only keep certain columns\n",
    "\n",
    "is2_df = spark.sql(\"select ST_GeomFromWKT(formed_line_WKT) as orbit, day_key, day_cycle, date_date_type, day_exact_time, day_hour, Description from is2_df_raw\")\n",
    "\n",
    "is2_df = is2_df.withColumn('is_timestamp', F.unix_timestamp(F.col('date_date_type')))\n",
    "is2_df.createOrReplaceTempView(\"is2_df\")\n",
    "\n",
    "## Show the schema of the table\n",
    "spark.table(\"is2_df\").show(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "ordinary-france",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Extract data within potential time slots\n",
    "import time\n",
    "import datetime\n",
    "\n",
    "start_timestamp = time.mktime(datetime.datetime.strptime(start_date, \"%Y-%m-%d\").timetuple())\n",
    "start_timestamp = start_timestamp - hour_delay * 3600 # for potential time delay\n",
    "\n",
    "end_timestamp = time.mktime(datetime.datetime.strptime(end_date, \"%Y-%m-%d\").timetuple())\n",
    "end_timestamp = end_timestamp + hour_delay * 3600 # for potential time delay"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "infinite-offset",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------+-------+---------+-------------------+-------------------+--------+--------------------+------------+\n",
      "|               orbit|day_key|day_cycle|     date_date_type|     day_exact_time|day_hour|         Description|is_timestamp|\n",
      "+--------------------+-------+---------+-------------------+-------------------+--------+--------------------+------------+\n",
      "|LINESTRING (-124....|    935|        6|2020-02-26 00:00:00|2020-02-26 00:00:15|       0|RGT 935 26-Feb-20...|  1582693200|\n",
      "|LINESTRING (-124....|    936|        6|2020-02-26 00:00:00|2020-02-26 00:00:32|       0|RGT 936 26-Feb-20...|  1582693200|\n",
      "|LINESTRING (97.63...|    936|        6|2020-02-26 00:00:00|2020-02-26 00:24:32|       0|RGT 936 26-Feb-20...|  1582693200|\n",
      "|LINESTRING (38.44...|    936|        6|2020-02-26 01:00:00|2020-02-26 01:00:32|       1|RGT 936 26-Feb-20...|  1582696800|\n",
      "|LINESTRING (-147....|    937|        6|2020-02-26 01:00:00|2020-02-26 01:34:50|       1|RGT 937 26-Feb-20...|  1582696800|\n",
      "|LINESTRING (161.0...|    937|        6|2020-02-26 01:00:00|2020-02-26 01:57:50|       1|RGT 937 26-Feb-20...|  1582696800|\n",
      "|LINESTRING (37.46...|    937|        6|2020-02-26 02:00:00|2020-02-26 02:00:50|       2|RGT 937 26-Feb-20...|  1582700400|\n",
      "|LINESTRING (-168....|    937|        6|2020-02-26 03:00:00|2020-02-26 03:00:50|       3|RGT 937 26-Feb-20...|  1582704000|\n",
      "|LINESTRING (-171....|    938|        6|2020-02-26 03:00:00|2020-02-26 03:09:07|       3|RGT 938 26-Feb-20...|  1582704000|\n",
      "|LINESTRING (178.8...|    938|        6|2020-02-26 03:00:00|2020-02-26 03:27:07|       3|RGT 938 26-Feb-20...|  1582704000|\n",
      "+--------------------+-------+---------+-------------------+-------------------+--------+--------------------+------------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "is2_df = is2_df.filter(F.col('is_timestamp') <= end_timestamp).filter(F.col('is_timestamp') >= start_timestamp)\n",
    "is2_df.createOrReplaceTempView(\"is2_df\")\n",
    "\n",
    "## Show the schema of the table\n",
    "spark.table(\"is2_df\").show(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "overhead-shame",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "interracial-tourist",
   "metadata": {},
   "source": [
    "### read Sentinel data for spatial join\n",
    "the file contains Sentinel-2 metadata that collect for one year in 2020, contains 2052387 records in total\n",
    "the format of meta_s2_2020_wkt.csv is in CSV with | sep, and footprint is in WTS format\n",
    "the dataset is also read from Hadoop file system\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "valued-throw",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "sentinel_df = spark.read.option(\"header\",True).options(delimiter='|').csv(\"meta_s2_2020_wkt.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "powerful-option",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "root\n",
      " |-- _c0: string (nullable = true)\n",
      " |-- s2_index: string (nullable = true)\n",
      " |-- timestamp: string (nullable = true)\n",
      " |-- wkt_geo: string (nullable = true)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "sentinel_df.printSchema()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "prerequisite-northern",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "stock-currency",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---+--------------------+--------------------+--------------------+\n",
      "|_c0|            s2_index|           timestamp|             wkt_geo|\n",
      "+---+--------------------+--------------------+--------------------+\n",
      "|  0|20191231T235959_2...|2020-01-01T00:01:...|POLYGON ((166.768...|\n",
      "|  1|20191231T235959_2...|2020-01-01T00:01:...|POLYGON ((167.185...|\n",
      "|  2|20191231T235959_2...|2020-01-01T00:01:...|POLYGON ((167.144...|\n",
      "|  3|20191231T235959_2...|2020-01-01T00:01:...|POLYGON ((169.663...|\n",
      "|  4|20191231T235959_2...|2020-01-01T00:00:...|POLYGON ((167.573...|\n",
      "|  5|20191231T235959_2...|2020-01-01T00:00:...|POLYGON ((168.518...|\n",
      "|  6|20191231T235959_2...|2020-01-01T00:01:...|POLYGON ((169.588...|\n",
      "|  7|20191231T235959_2...|2020-01-01T00:01:...|POLYGON ((169.555...|\n",
      "|  8|20191231T235959_2...|2020-01-01T00:01:...|POLYGON ((169.641...|\n",
      "|  9|20191231T235959_2...|2020-01-01T00:01:...|POLYGON ((169.471...|\n",
      "| 10|20191231T235959_2...|2020-01-01T00:00:...|POLYGON ((169.437...|\n",
      "| 11|20191231T235959_2...|2020-01-01T00:01:...|POLYGON ((170.575...|\n",
      "| 12|20191231T235959_2...|2020-01-01T00:01:...|POLYGON ((169.494...|\n",
      "| 13|20191231T235959_2...|2020-01-01T00:01:...|POLYGON ((171.457...|\n",
      "| 14|20191231T235959_2...|2020-01-01T00:00:...|POLYGON ((171.989...|\n",
      "| 15|20191231T235959_2...|2020-01-01T00:01:...|POLYGON ((170.999...|\n",
      "| 16|20191231T235959_2...|2020-01-01T00:00:...|POLYGON ((169.559...|\n",
      "| 17|20191231T235959_2...|2020-01-01T00:00:...|POLYGON ((169.524...|\n",
      "| 18|20191231T235959_2...|2020-01-01T00:00:...|POLYGON ((168.494...|\n",
      "| 19|20191231T235959_2...|2020-01-01T00:00:...|POLYGON ((169.425...|\n",
      "+---+--------------------+--------------------+--------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "sentinel_df.show(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "rapid-synthesis",
   "metadata": {},
   "outputs": [],
   "source": [
    "sentinel_df = sentinel_df.withColumn('date_date_type', F.to_timestamp(F.udf(lambda x: x[:10] + ' ' + x[11:19])(F.col('timestamp')), 'yyyy-MM-dd HH:mm:ss'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "freelance-musician",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---+--------------------+--------------------+--------------------+-------------------+\n",
      "|_c0|            s2_index|           timestamp|             wkt_geo|     date_date_type|\n",
      "+---+--------------------+--------------------+--------------------+-------------------+\n",
      "|  0|20191231T235959_2...|2020-01-01T00:01:...|POLYGON ((166.768...|2020-01-01 00:01:29|\n",
      "|  1|20191231T235959_2...|2020-01-01T00:01:...|POLYGON ((167.185...|2020-01-01 00:01:20|\n",
      "|  2|20191231T235959_2...|2020-01-01T00:01:...|POLYGON ((167.144...|2020-01-01 00:01:05|\n",
      "|  3|20191231T235959_2...|2020-01-01T00:01:...|POLYGON ((169.663...|2020-01-01 00:01:27|\n",
      "|  4|20191231T235959_2...|2020-01-01T00:00:...|POLYGON ((167.573...|2020-01-01 00:00:51|\n",
      "|  5|20191231T235959_2...|2020-01-01T00:00:...|POLYGON ((168.518...|2020-01-01 00:00:38|\n",
      "|  6|20191231T235959_2...|2020-01-01T00:01:...|POLYGON ((169.588...|2020-01-01 00:01:17|\n",
      "|  7|20191231T235959_2...|2020-01-01T00:01:...|POLYGON ((169.555...|2020-01-01 00:01:02|\n",
      "|  8|20191231T235959_2...|2020-01-01T00:01:...|POLYGON ((169.641...|2020-01-01 00:01:27|\n",
      "|  9|20191231T235959_2...|2020-01-01T00:01:...|POLYGON ((169.471...|2020-01-01 00:01:12|\n",
      "+---+--------------------+--------------------+--------------------+-------------------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "sentinel_df.show(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "chemical-insert",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "governing-apache",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "root\n",
      " |-- _c0: string (nullable = true)\n",
      " |-- s2_index: string (nullable = true)\n",
      " |-- timestamp: string (nullable = true)\n",
      " |-- wkt_geo: string (nullable = true)\n",
      " |-- date_date_type: timestamp (nullable = true)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "sentinel_df.printSchema()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "affecting-helicopter",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2052387"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sentinel_df.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "overall-forwarding",
   "metadata": {},
   "outputs": [],
   "source": [
    "# extract spatial information using WKT format footprint\n",
    "sentinel_df.createOrReplaceTempView(\"sentinel_df\")\n",
    "sentinel_sedona = spark.sql(\"select ST_GeomFromWKT(sentinel_df.wkt_geo) as geometry, sentinel_df.s2_index as s2_index, sentinel_df.date_date_type as date_date_type from sentinel_df\")\n",
    "sentinel_sedona.createOrReplaceTempView(\"sentinel_sedona\")\n",
    "sentinel_sedona = sentinel_sedona.withColumnRenamed('date_date_type', 'Sentinel_date_date_type')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "forty-speech",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------+--------------------+-----------------------+\n",
      "|            geometry|            s2_index|Sentinel_date_date_type|\n",
      "+--------------------+--------------------+-----------------------+\n",
      "|POLYGON ((166.768...|20191231T235959_2...|    2020-01-01 00:01:29|\n",
      "|POLYGON ((167.185...|20191231T235959_2...|    2020-01-01 00:01:20|\n",
      "|POLYGON ((167.144...|20191231T235959_2...|    2020-01-01 00:01:05|\n",
      "|POLYGON ((169.663...|20191231T235959_2...|    2020-01-01 00:01:27|\n",
      "|POLYGON ((167.573...|20191231T235959_2...|    2020-01-01 00:00:51|\n",
      "|POLYGON ((168.518...|20191231T235959_2...|    2020-01-01 00:00:38|\n",
      "|POLYGON ((169.588...|20191231T235959_2...|    2020-01-01 00:01:17|\n",
      "|POLYGON ((169.555...|20191231T235959_2...|    2020-01-01 00:01:02|\n",
      "|POLYGON ((169.641...|20191231T235959_2...|    2020-01-01 00:01:27|\n",
      "|POLYGON ((169.471...|20191231T235959_2...|    2020-01-01 00:01:12|\n",
      "+--------------------+--------------------+-----------------------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Extract data within potential time slots\n",
    "\n",
    "sentinel_sedona = sentinel_sedona.filter(F.unix_timestamp(\"Sentinel_date_date_type\") <= end_timestamp).filter(F.unix_timestamp(\"Sentinel_date_date_type\") >= start_timestamp)\n",
    "sentinel_sedona.createOrReplaceTempView(\"sentinel_sedona\")\n",
    "\n",
    "sentinel_sedona.show(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cd6ca54d-a042-4323-aad9-c7631ef573b4",
   "metadata": {},
   "source": [
    "## Third read AOI data for select coincident data within AOI area"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "76b39754-ee61-4088-9d47-8264d03de071",
   "metadata": {},
   "outputs": [],
   "source": [
    "# read and check potential interested areas using geopandas\n",
    "Beaufort_Sea_gpd = gpd.read_file('./ICESpark_Polygons/Beaufort_Sea_ICESpark.shp')\n",
    "Beaufort_Sea_gpd['id'] = 'Beaufort_Sea'\n",
    "Wandel_Sea_ICESpark_gpd = gpd.read_file('./ICESpark_Polygons/Wandel_Sea_ICESpark.shp')\n",
    "Wandel_Sea_ICESpark_gpd['id'] = 'Wandel_Sea'\n",
    "\n",
    "potential_interested_area_gpd = gpd.GeoDataFrame(pd.concat([Wandel_Sea_ICESpark_gpd, Beaufort_Sea_gpd], ignore_index=True) )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "e95f5470-14d7-4abc-922d-b24c747ae96d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:>"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAACzCAYAAACQEkeRAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAbXklEQVR4nO3de3Qc9ZXg8e9VS62X9X5YQg/LT8mS35IN+A3YGPOQDcZgwwQ8TGI4hizeJCSwZDiQ7E6YyR9kZ0nIMtlkyQQYlgkmkEBYAoHMkkBim6exDZhgwNhItgHbki1Z0t0/1E0UWY9uqaurqvt+ztFxd3VX1T0/t66q7+9Xv5+oKsYYY/wnxe0AjDHGjIwlcGOM8SlL4MYY41OWwI0xxqcsgRtjjE9ZAjfGGJ9KjefJiouLtaamJp6nNMYY39u2bdtBVS3pvz2uCbympoatW7fG85TGGON7IrJ3oO1WQjHGGJ+yBG6MMT5lCdwYY3zKErgxxvhUXDsxjTHGz7p7lMNtnbQe7aD1WEfvv+GfYx18q7mBguxg3OKxBG6MSWqqytGOrr9OxoMk6EPHOugZYgLXGRV5fGnxhLjFbgncGJOQTpzs5uCxoRNy+HFHV09Mznn/S3v5u4XjSUmRmBxvOJbAjTG+MVwJo/Xoic+fHznRFff43jvUzgt7DrJo8in33DjCErgxxlWxLGF4wc9e3GsJ3Bjjb26UMLzgNztbOPDZCcryMhw/lyVwY0zEvF7C8ILuHuXf/vQ+m5dNcfxcwyZwEakFHuqzaQJwm6p+L/T614DvAiWqetCJII0xzlFVjvUtYQyQmFuO+KeE4QX/9scPuOGsSaQGnL3VZtgErqq7gVkAIhIA9gFbQs+rgOXA+86FaIwZiY6ubg4e6xygtnzilGR94mTilDC84MCRE/xmZwvnTStz9DzRllDOAfaoanhmrLuArwO/iGlUxpgB9fQoh9s7h+3saz3awWfHT7odblK7/6W9nkvg64AHAUSkGdinqq+KDD7mUUQ2AhsBqqurRximMYkrkhJG+PGhtk66rYbhC//x9kH+fLCN8cXZjp0j4gQuIkGgGbhFRLKAW4Fzh9tPVe8F7gVoamqyT55JGlbCMA/+8X3+y/lTHTt+NFfgK4HtqvqxiEwHxgPhq+9KYLuIzFPVAw7EaYwnWAnDROPhrR/wleVTyEgLOHL8aBL4ekLlE1V9HSgNvyAi7wFNNgrF+JGVMIxTPmk/yROv7+eSOZWOHD+iBB4qmSwHrnUkCmMc0NnVM+SNJC1HT1gJwzjuZy/udTeBq2o7UDTE6zWxCsiYofT0KJ+0d556ldw/QR/r4NN2K2EY921//1Pe/OgI9aflxvzYdiemcZ2q0tbZ3S8hnzglIbce7eDgMSthGP958d1DlsCNvwxXwuj7/PjJbrfDNcYxy+vHOnJcS+AmKlbCMCY6jeMKqCrMcuTYlsCNlTCMcdCqWac5dmxL4AnMShjGuCuQIlwwvdyx41sC95meHuXT4ycHvquv38xxVsIwxl2LJhdTNCbdseNbAveItvCNJMPUlg8e66DLShjG+MLqWRWOHt8SuIM6u3o41DZ8Z1/r0Q7aO62EYUwiyUwLODb6JMwSeJQiKWGEH39iJQxjktby+rFkpzubYi2Bh1gJwxgTS06OPglL6ARuJQxjjBsKstJYPMX5lel9l8CthGGM8boLZpST5vB6mOCjBH7nk7t49OV9VsIwxnjeKodHn4Q5/yciRs6uK+XAkROWvI0xnlaRn0ljdUFczuWbBD5vfCFza+LTKMYYM1LNs04jJWXwdYJjyTcJHGDT0kluh2CMMUNy+uadvoatgYtILfBQn00TgNvoXeBhFdADtAAbVPUjJ4IMW1pbwtTyXHbuP+LkaYwxZkTqynKoLcuJ2/mGvQJX1d2qOktVZwGNQDuwBfiuqs4Ibf8lvUndUSLCpqUTnT6NMcaMSLw6L8OiLaGcA+xR1b2q2vcyOBuIS+/i+dPLqSlyZm5dY4wZjYtmOjfz4ECiTeDrCK1MDyAi/01EPgCuZJArcBHZKCJbRWRra2vryCMNCaQI1y2xq3BjjLfMqymksiC+F5cRJ3ARCQLNwMPhbap6q6pWAfcDNwy0n6req6pNqtpUUhKbO5MunlNBWW5GTI5ljDGx0ByHW+f7i+YKfCWwXVU/HuC1B4A1sQlpeOmpAb64aHy8TmeMMUNKdXjhhsFEk8DX89flk8l9XmsGdsUqqIiCmVdNflZaPE9pjDEDWjKlhILsYNzPG1ECF5EsYDnwSJ/Nd4rIGyLyGnAucKMD8Q0qOz2Vv51vV+HGGPetmh3f0SdhEc2Foqrt9I777rstbiWTwVw9fxz3/m4PbTaToDHGJVnBAMunOrtww2B8dSdmf/lZQa48Y5zbYRhjktiKhjIygwFXzu3rBA5wzYLxxGnaAWOMOUU8Fm4YjO8TeFleRlwmTjfGmP6KsoMsnFTs2vl9n8AB1jZWuR2CMSYJXTijnNQ4LNwwmIRI4MvqS21IoTEm7prjPPdJfwmRwNNTA3GdwtEYY6oKM5lTne9qDAmRwAEubax0OwRjTBJZNbMCEXdHUCRMAp9WkUd9ea7bYRhjksTq2e6NPglLmAQOsLbJrsKNMc5rHFfApNL4LdwwmIRK4KtmVZAWsEHhxhhnXTGv2u0QgARL4IXZQZbXu3NLqzEmOeRmpHLBjPjPPDiQhErgYGPCjTHOumROJRlp7tw631/CJfBFk4spzUl3OwxjTIK68nRvlE8gARN4aiCFS+ZYZ6YxJvbm1hQweaz7nZdhCZfAwUajGGOcsd4jnZdhCZnAJ5aMoXFcgdthGGMSSF5mGue7sGzaUIZN4CJSKyKv9Pk5IiKbReS7IrJLRF4TkS0ikh+HeCO21u7MNMbE0BoPdV6GDZvAVXW3qs5S1VlAI9AObAGeBqap6gzgLeAWJwON1gUzysn0WGMbY/zritO9N8It2hLKOcAeVd2rqv9XVbtC218EPHXJm5ORxsrpZW6HYYxJAPNqCj1x52V/0SbwdfRZmb6Pa4AnRx9ObNmYcGNMLFzhoaGDfUWcwEUkCDQDD/fbfivQBdw/yH4bRWSriGxtbW0dTaxRO318IdWFWXE9pzEmsRRkpXHeNG9+m4/mCnwlsF1VPw5vEJGrgQuBK1VVB9pJVe9V1SZVbSopie/SZykpYtPMGmNGxYudl2HRJPD19CmfiMh5wDeAZlVtj3VgsbKmsRKXp+w1xvjYOo+N/e4rogQuIlnAcuCRPpvvBnKAp0PDC3/oQHyjVpGfyYKJ7i06aozxr9PHFzKpdIzbYQwqNZI3ha6wi/ptm+RIRA5Y21TJ/3vnoNthGGN8xqudl2EJeSdmfysaysjJiOhvlTHGAN7uvAxLigSekRageab7yx8ZY/zj0sZK0lO92XkZlhQJHOCyJhsTboyJnNcmrhpI0iTwGZV5TBnr3c4IY4x3nDmhiAkl3s8XSZPARcTuzDTGRMTrnZdhSZPAAVbPriA1xQaFG2MGV5gd5NwGf6ytm1QJvCQnnbPqSt0OwxjjYWt90HkZllQJHNybJ3zVrNMoz8tw5dzGmMh5+c7L/pJucPRZdaUUjwly8FhnXM53xoRCvnlBPdMq8mg5coK/u28rr+/7LC7nNsZEZ/7EIsYXZ7sdRsSS7go8LZDCxbMrHD/PhJJsfnRVEw9+6QymVeQBUJqbwUPXnsEKn9TXjEk2fum8DEu6BA6w1uEx4ZNLx/DEf1rEsvqxSL+ZtLKCqdxzZSPXLpngaAzGmOgUjwlybr2377zsLykT+JSxOcyszHPk2GkB4a7LZw05/WRKinDLyqn845rpNirGGI+4tLGKYKq/UqK/oo0hp67CNy+b8nnJZDiXz63m0esXeHq2M2OSxbq5/rtPJGkT+EUzTyM9xn9tG8cVcN2SiVHtM60ij19+eSEb5tfENBZjTOQWTiqmxkedl2FJm8DzMtNY0RC7eld2MMBdl80iMIKSSEZagNubG7jvmnmU5qTHLCZjTGT81nkZlrQJHGI7wdXfX1hPddHo1t9cMqWEpzYv5rwY/mExxgyteEw6y+v9OTIsqRP4/IlFVORnjvo4y6aWcnmM6mcF2UHu+Zs5fPfSGYxJT7ph+sbE3dqmStIC/kyFw0YtIrWhJdPCP0dEZLOIrBWRHSLSIyJN8Qg21lJShDVzRjcmvCg7yHcumXHKcMHREBHWNlXx5I2LaBpXELPjGmNOtX6uP8snEEECV9XdqjpLVWcBjUA7sAV4A7gE+J2jETrs0lHOUPidS6ZT4lDduqowi4euPZObVtTacENjHLBocvGoS59uivY7+jnAHlXdG94QyytPN1QXZXHGhEJefPdwVPstnFTMzSvrIh4yOFKBFOH6syaxeHIJmx96mT2tbZ+/lp6aQlF2kILsIIXZQQqyev8tDG/LClKQnca7rW3c8fgOTnaro7Ea4zdX+rTzMizaBL4OeDCaHURkI7ARoLram421trEq4gReX57LzSvrWDylxOGo/tr0yjx++eVFvN1ylMLsIEXZ6WQGI5sxbf7EYmrLcrj2X7dxuC0+c8AY43UlOemcM9WfnZdhEVfuRSQINAMPR3MCVb1XVZtUtamkJL5JL1Irp5cN22FYWZDJ9y6fxS+/vDDuyTssMxhgRmU+lQVZESfvsLk1hTy6aQGT7aYhYwC4zMedl2HRXIGvBLar6sdOBeOWrGAq910zl7aObjKDATLTAmSkBT5/3Ps8JSHKRT/fNJ8bHniZ373V6nY4xrhGBNb5uPMyLJoEvp4oyyd+0jiu0O0Q4iI3I40fX93Ef/3VTv73799zOxxjXLF4cglVhf7tvAyL6PuDiGQBy4FH+my7WEQ+BM4EfiUiTzkToom11EAKtzc38O1VDSO6c9QYv/PDivORiOgKXFXbgaJ+27bQO5zQ+NQXzqxhXFE21z+wnaMnutwOx5i4KM1J55ypibG0or8r+GbUFk8pYcum+VRH8HVybG466+ZW8T+/0Mifbl3GbRfWU5Zry8QZf7l8bpXvOy/DRDV+Y4Obmpp069atcTufidzhtk6u+9dt/PG9vwynFIHZVfmcXVfKWXWl1JfnntKR29HVzZbt+7jn+T3sPdQe77CNiVhmWoAbl03mmgXjfTfvt4hsU9VT7ni3BG4+19HVzR2Pv8mxE12cVVfCkimlFGYHI9q3q7uHJ944wA9++w67Dhx1OFJjorOiYSy3XdQQk7mP3GAJ3MRFT4/y7K4W7v7tO7zywaduh2OSXHVhFnc0N3BWnb9r3oMlcJvuzsRUSoqwrH4s50wt5Q97DvH9597hhXcOuR2WSTLBQArXLZ3IpqUTh1ze0O8sgRtHiAjzJxUzf1IxL7//CT94bg9Pv5lw94AZD1o0uZhvrZrGeB+usBMtS+DGcbOrC/iXq5rYdeAI9zy3h8df/Ygem1fLxFhZbga3XVTPymllvr9rOlJWAzdxt/dQGz98/l1+vu1DOrt73A7H+FwgRbhmQQ03LpuSsIugWCem8ZwDn53gX/7jXR546X2On+x2OxzjQ3NrCvj26mnUleW6HYqjLIEbzzrc1sn/2foBz+z8mG17P7HyihlWUXaQW86fypo5FUlRLrEEbnzhk7ZOnn+rlWd3tfDc7haOOHyLf4pAbVkuO/cfcfQ8JjZEehdhuOncOvKy0twOJ25sGKHxhYLsIKtnV7B6dgVd3T1s2/sJz+5u4dmdLbzdciwm5xDpnR/9ohnlrJhWRmlOBi+9e4jbfrGD3R/bTUheNaMyj2+vmsbMqny3Q/EMuwI3vvHB4Xae3dXCM7taeHHPoag7QJvGFXDhjHJWTi9n7ABzuJzs7uGnf9jLXU+/xbEOm9zLK3IzUrnpvDqumFedtLNnWgnFJJS2ji5eeOcgv93dwjM7W2g52jHg+2ZX53PB9HIumFFOeV5kt1G3HDnBd57cxZaX98UyZDMCa+ZUcsv5dRSPcWbhcL+wBG4Slqqy46Mjn1+dqyoXzijn/OnlVBaMfNJ+K6u4Z8rYMXx71TROn1A0/JuTgCVwY0bgZHcP9/3+Pb73m7etrBIHWcEAm5dN5m8XjE+YKV9jYbAEPmwLiUitiLzS5+eIiGwWkUIReVpE3g79W+BM6Ma4Jy2QwhcXTeDZry5h9azT3A4noZ0/vYxnvrqEjYsnWvKOUFRX4CISAPYBpwPXA4dV9U4RuRkoUNVvDLW/XYEbv3vhnYN87eFX2f/ZCbdDSRg1RVncsWoaS6aUuB2KZ434Cryfc4A9qroXWAXcF9p+H7B6VBEa4wMLJhXz6xsXc9FMuxofrWBqCv952RR+vXmxJe8RinYc+Dr+sjL9WFXdD6Cq+0XE3xPuGhOhvKw0/sf62SybWso3H33D1hMdgaW1JdzR3MC4osSfMdBJESdwEQkCzcAt0ZxARDYCGwGqqxNjJWhjAFbNqqCpppCvPPQKL/358PA7GE7Ly+C2ixpY0TA2KW6Bd1o0JZSVwHZVDU/q/LGIlAOE/m0ZaCdVvVdVm1S1qaTEviaZxFKRn8kDXzqDW1bWkRawhDSUDfNrePorSzgviaZ7dVo0CXw9fymfADwGXB16fDXwi1gFZYyfBFKEa5dM5NHrFzBl7Bi3w/GcwuwgP97QxO3NDWQn6HSvbokogYtIFrAceKTP5juB5SLydui1O2MfnjH+0XBaHo/dsJBrFox3OxTPWDipmF/fuIiz68a6HUpCiujPoaq2A0X9th2id1SKMSYkIy3AbRfVc3ZdKQ/+6X2Od3bT3tnF8c5u2jq7P3/e3tlNR5f3FrOoyM+ktiyHurIc6spzmTJ2DO+0HOOxVz7iud2tEc8/k5oi3LSili8tmkBKks5fEg92J6YxLunuUY6f7JPgO7o5frKLDz85zuaHXsHJX83sYIDashxqy3KZWp5DXVkutWU55GUOPkXrZ8dP8tSOAzz2ykf8fs/BQedtrynK4p/Xz2ZGZb4zwSchm07WGI8JpAhj0lNPWQascRy89OfDPPDS+46c94Evns4ZE4qivjLOy0zjsqYqLmuqouXoCZ54bT+PvfoR29//9PP3XNpYye3NDQm7tJnXWCsb40E3nVvLE6/v59P2kzE97ryaQuZPKh71cUpzMtiwYDwbFozng8PtPP7aR1QWZNFsNzjFlU04YIwHFWQH+eq5tTE/7jULa2J+zKrCLDYtnWTJ2wWWwI3xqCvmVVNfHrvFeivyM1leXxaz4xn3WQI3xqMCKcK3VjXE7HhXzx+XtCvaJCpL4MZ4WFNNIRfPrhj1cbKCAS5vsqksEo0lcGM87paVdWQHA6M6xpo5lUm1inuysARujMeV5mZw47LJozrGhgU1sQnGeIolcGN8YMP88Zw/fWQdkEtrS5hYYnO0JCJL4Mb4QDA1he9fMYevn1dLtBP52dwsicsSuDE+ISJsWjqJn2yYS25GZPfgTSodw6LJo79xx3iTJXBjfGZpbSmP3bAwoqlrN8yvsbm3E5glcGN8qKY4my2bFgxZF8/LTOOSOaMfgmi8yxK4MT6VnZ7K96+Yw00rBq6Lr5tXRVbQpjtKZJbAjfExEeH6sybx43518UCKcNWZNe4FZuLCErgxCeCsfnXx8xrKqMjPdDkq47RIl1TLF5F/F5FdIrJTRM4UkZki8gcReV1EHheR2M26Y4yJWk1xNo9sWsDKaWWOzDpovCfSK/D/DvxaVeuAmcBO4EfAzao6HdgC3ORMiMaYSI1JT+UHV85hTnWB26GYOBg2gYeurBcD/wtAVTtV9VOgFvhd6G1PA2scitEYEwURsaGDSSKSK/AJQCvwExF5WUR+JCLZwBtAc+g9a4GqgXYWkY0islVEtra2tsYkaGOMMZEl8FRgDnCPqs4G2oCbgWuA60VkG5ADdA60s6req6pNqtpUUlISo7CNMcZEksA/BD5U1ZdCz/8dmKOqu1T1XFVtBB4E9jgVpDHGmFMNm8BV9QDwgYiEF+g7B3hTREoBRCQF+CbwQ8eiNMYYc4pIR6F8GbhfRF4DZgH/AKwXkbeAXcBHwE8cidAYY8yARFXjdzKRVmBv3E44tGLgoNtBDMHiGzkvxwYW32glY3zjVPWUTsS4JnAvEZGtqtrkdhyDsfhGzsuxgcU3WhbfX9it9MYY41OWwI0xxqeSOYHf63YAw7D4Rs7LsYHFN1oWX0jS1sCNMcbvkvkK3BhjfC3hE7iIrBWRHSLSIyJN/V6bEZoSd0doWtyM0PbG0PN3ROSfxcGZgQaLT0RqROS4iLwS+vlhn9eeE5HdfV4r9Vh8rrdfn9erReSYiHytzzbX22+Y+OLSfkP8387r0zavisjFfV5zve2Gic/1z56ILBeRbaE4tonI2X1ei237qWpC/wBT6Z058Tmgqc/2VOA1YGboeREQCD3+I3AmIMCTwEoX4qsB3hhkn796r0vtN1R8rrdfn9d/DjwMfM1L7TdMfHFpvyH+b7OA1NDjcqClz3PX226Y+Fz/7AGzgdNCj6cB+5z67CX8gnmquhMYaHrNc4HXVPXV0PsOhd5XDuSq6h9Cz38KrKb3wxDP+Dwh2vi81H4ishp4l94J2FwRbXzxbL/BYlPV9j5PMwBXOsqijc8rnz1VfbnP0x1Ahoikq2pHrGNI+BLKEKYAKiJPich2Efl6aHsFvRN4hX0Y2uaG8dI7he/zIrKo32s/CX0F+3snvyYOY6D4PNF+0jvl8TeAOwZ5i6vtN0R8Xmm/00VkB/A6cJ2qdvV52fXP3iDxeaLt+lkDvNwveces/RLiClxEfgOUDfDSrar6i0F2SwUWAnOBduAZ6Z0a98gA7x3VFcgI49sPVKvqIRFpBB4VkQZVPQJcqar7RCSH3q/gXwB+6oX46P3q2p8b7XcHcJeqHhvgd8QL7TdYfDFtvxHGhvbOPtogIlOB+0TkSVU9gTfabsD48M5nL7xvA/CP9H7bD4tp+yVEAlfVZSPY7UPgeVU9CCAiT9A77/nPgMo+76ukd7KuuMYX+ovdEXq8TUT20PutYauq7gttPyoiDwDzGMWHIMbxfYgH2g84HbhURP4JyAd6ROSEqt7thfYbLD56f6lj1n4jjK3v/jtFpI3eWq4nPntDxOeVzx4iUknvUpNXqernU23Huv2SuYTyFDBDRLJEJBVYArypqvuBoyJyRujrzVXAkH9pnSAiJSISCD2eAEwG3hWRVBEpDm1PAy6kd3UkT8TnlfZT1UWqWqOqNcD3gH9Q1bu90n6DxeeF9hOR8aHfCURkHL0dde95pe0Gi88LbReKKR/4FXCLqr7QZ3vs28+pHlqv/AAX0/uXuQP4GHiqz2t/Q28nwxvAP/XZ3hTatge4m9ANT/GMj97a2Q7gVWA7cFFoezawjd4RNDvoXXA64JX4vNJ+/d5zO6FRHl5pv8Hii2f7DfF/+4VQ27wS+r9d7aW2Gyw+r3z26F0foS0UX/in1In2szsxjTHGp5K5hGKMMb5mCdwYY3zKErgxxviUJXBjjPEpS+DGGONTlsCNMcanLIEbY4xPWQI3xhif+v8N++gDOpSa+gAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "potential_interested_area_gpd[potential_interested_area_gpd['id'] == 'Beaufort_Sea'].plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "a67f8732-db5d-4857-bfd9-78041c551720",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:>"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAASUAAAD4CAYAAACqsPAyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAdyUlEQVR4nO3deZScdZ3v8fe3qvdOV2XrTqe3JEAW0gmyNAETRBBFRIVxu6Ko4DIZFGSuy51RGcVRZ0av3qPO1Tsjo849My4cVBBkDej1aDwqBCUQ1mTCkgXICtnT2/f+UVWd6k51dyVd9TxPPf15ncPpqqfrqfo9kHz4/X71e35fc3dERKIiEXYDRETyKZREJFIUSiISKQolEYkUhZKIREpV2A0oZObMmT537tywmyEiZfTggw/ucPfmkccjGUpz585lzZo1YTdDRMrIzJ4tdFzDNxGJFIWSiESKQklEIkWhJCKRolASkUhRKIlIpCiURCRSFEoiEimRXDwp4TnUN8Cu/b3s2t/L7gPZn9nnuw70snt/Hy8d7KV9aj2LZ6fobk+zqLWJprrqsJsuMaFQirH+gUFeOtjH7v297MyFy4FcyPSx+0De8WwIHegdOK7PmjOjIRNSbSkWt6VYPDvNrFQtZlbiq5K4UyhVCHdnz6H+EcGS34M5EjS54y8f7Ausfc/uPMCzOw9w17oXho7NaKzJBlRq6OcJzVNIJhRUMjqFUkjyh0kjh0o7hz3vGwqd/sHK2rp45/5efrt+B79dv2PoWF11goWtqWG9qkWtTTTU6I+iZOhPQgn0Dwyy+0Df0cGS34s50Meu/YczIbO/l4N9xzdMqnSH+gZZu+kl1m56aeiYGcyb2Uh3W3pYr6q5qTa8hkpoFEoj5IZJQz2YAsOlofA50MfOfYfZc6g/7GZXNHfYuH0/G7fv5xdrtw4db2mqHQqo7rY0i9tSzJneQELDv1iLfSgd7B0YCpWRk7q5nzv35Z738dKByhsmxdW2vYfZ9uR2fv3k9qFjDTVJTs4N/bK9qgWzmqirTobYUimlWIfSN+57im/ctz7sZkgJHegd4MFnd/Pgs7uHjiUTxknNU46aVJ/WWBNiS+V4xTqUXtxzKOwmSAAGBp0nX9zLky/u5ZY/bxk63pauywRUdq6quy1Fx7R6LVOIuKJCycw+BnwIcOAR4P3Ap4C/BHJ968+4+50Fzr0I+CaQBL7r7l8uQbuLsm3P4aA+SiJo68uH2PryIe57fNvQsaa6qmG9qcVtKea3NFFTpZsbomLcUDKzduBaYLG7HzSzm4DLsr/+urt/bYxzk8C3gdcBm4EHzOw2d39s4k0f37a9CiUZbu+hfv749C7++PSuoWPVSWN+S1PepHqKk9tSpLRKPRTFDt+qgHoz6wMagK3A3CLOWwZscPeNAGZ2I3ApEEgobVcoSRH6BpzHnt/DY8/vGXa8c3o93bPTw3pVs9N1Gv6V2bih5O5bzOxrwHPAQWCVu68ys+XANWb2PmAN8Al33z3i9HZgU97zzcBZpWn62AYHnR37FEpy/DbtOsimXQe5+9Ejq9SnNVQPC6nutjQnzGykKqnhX6kUM3ybRqZ3Mw94CfiJmb0H+Bfgi2Tmmb4I/C/gAyNPL/CWBb9vN7OVwEqArq6u4lo/hl36al/KYPeBPn63YSe/27Bz6FhNVYJFrU3Dliksak3RWBvr75HKpph/a68Fnnb37QBmdjOw3N1/kHuBmf0bcHuBczcDnXnPO8gM/Y7i7jcANwD09PRMOE00yS1B6e0f5OHNL/Pw5peHjpnBvBmNnDysV5WipakuxJZWhmJC6TngbDNrIDN8uwBYY2az3f357GveAqwrcO4DwHwzmwdsITNB/u6JN3t82zV0kxC5w8Yd+9m4Yz93PPz80PGZU2qHTagvbksxd0ajblLOU8yc0h/N7KfAn4B+4M9kejTfNbNTyQzHngH+CsDM2sh89X+xu/eb2TXAPWSWBHzf3R8tx4WMtE1rlCSCduw7zG+e2s5vnjqySr2+Osmi2bnhX3roJuXJukq9qEGvu18PXD/i8HtHee1W4OK853cCR61fKjctB5BKcbBvgD8/9xJ/fu6loWMJgxPzVqnn7v2bPglWqcd2Jk7LAaSSDTqs37aP9dv2cetDR6ZhW1N1Q/NTubmqzmnxuklZoSRSQV7Yc4gX9hziV0/krVKvreLkkavUZ02htqoyh3+xDaVtezWnJJPD3sP93P/MLu5/5sgq9aqEcVLLlKG1VItnZwIr3RD9VeoxDiX1lGTy6h90nnhhL0+8sJeb/3TkJuWOafXD7v3rbk/TFrFV6rEMJXfXOiWRAjbvPsjm3QdZ9diLQ8fS9dXD1lItbktxYvMUqkNapR7LUNrfOzBpt5sVOVYvH+zj9xt38vuNeavUkwkWtE45cu9fW4qTZ6eYEsAq9ViGktYoiUxM78Ag67bsYd2W4Tcpz53RcNQyhZam0pbSimcoaT5JpCye2XmAZ3Ye4M5Hji6l9c3LTivJOqpY3tqsUBIJzs79vazesIPG2tIsQYhlKGmNkkiwZqfqSrYuKpahpDVKIsHqnN5QsveKZSht13IAkUB1KZTGpjklkWCppzQOzSmJBEs9pXFoTkkkWOopjaG3f5DdB/rCbobIpKKe0hi0Da5IsOqrk8ycUrrN5+IXSppPEglU1/SGkt5mUlQomdnHzOxRM1tnZj82s7q8333SzNzMZo5y7jNm9oiZPWRma0rV8NHovjeRYJVyPgmKCKW8st097r6ETAGAy7K/6yRTkvu5cd7mfHc/1d17JtjecWk5gEiwSjmfBMUP33Jlu6s4UrYb4OvA3zBKgckwKJREgtU1vb6k7zduKLn7FiBXtvt54OVs2e5LgC3uvna8twBWmdmD2Sq4BZnZSjNbY2Zrtm/fPtrLxqU5JZFgdc0IfviWX7a7DWg0s/cB1wGfK+IzVrj76cAbgKvN7NxCL3L3G9y9x917mpubi76AkbZrjZJIoMIYvg2V7Xb3PuBm4P1kQmqtmT1Dphz3n8ysdeTJ2TpwuPs24BZgWYnaXpCGbyLB6pgWfCgNle22zPd+FwA3u3uLu89197nAZuB0d38h/0QzazSzptxj4EIKl/cuGe3NLRKcWanaklfyLWZO6Y9Armz3I9lzbhjt9WbWZma5irizgNVmtha4H7jD3e+ecKtHMTjo7NDiSZHAlHroBhMr253/+7l5j4fKdrv7RuAVE2ti8XYf6KV/MDJfBIrEXqnXKEHMVnRrPkkkWOXoKSmUROS4KZTGoTVKIsHS8G0c2kdJJFjqKY1DywFEglNblaB5Sm3J3zdWoaThm0hwOqc3kEiUbsuSHIWSiByXcgzdIGahpDklkeAolIqgJQEiwSnHN28Qo1Dad7ifA70DYTdDZNJQT2kcmk8SCZZCaRzam1skWJ0l3nEyJz6hpJ6SSGBmTqmloaao+/mPmUJJRI5ZqfflzhebUNKckkhwyjWfBDEKJa1REgmOQqkI6imJBKdca5QgmAq5F5nZk2a2wcw+VaqGj6SbcUWCE2pPaSIVcs0sCXybTHmlxcC7zGxxaZo+3HbtzS0SmFLXestX7gq5y4AN7r7R3XuBG8nUkCup3v5Bdu3vLfXbikgBNckEs5rqxn/hcSp3hdx2YFPe883ZY0eZSIVcVTARCU7HtPqybFmSU+4KuYVaXrBXNZEKuVqjJBKcck5yQ/kr5G4GOvOed3Bk6Fcy+uZNJDjlnOSGMlfIBR4A5pvZPDOrITNBflsJ2w8olESCFHooTaRCrrv3A9cA9wCPAze5+6MlaPcwCiWR4JR7+FbWCrnZ53cCdxY4rWQ00S0SnNB7SpVAoSQSnHJtWZKjUBKRok1vrKGprrqsnxGTUNLCSZEglHs+CeISSproFglEueeTIAahdKhvgL2H+8NuhsikUM7N3XIqPpQ0nyQSHPWUiqD5JJHgaE6pCJpPEgmOekpF0PBNJBhVCWN2WnNK41IoiQSjY1o9yTJuWZJTnsJNAZpsc0rTG2tY0p5mSVuKR7a8zG/X7wi7STJJBDGfBDEIpThvgzsrVcuStjTd7WmWtqdZ0p6iNVVHZrMGcHfufexFvnTH4zy360DIrZW4UygVKS4T3e1T6+luS2XDJ013e4qWcbYcNTMu7G7l3AXNfG/103zrVxs42DcQUItlsglikhtiEEo7K3Bv7jkzGrI9oEwIdbelmd5Yc9zvV1ed5OrzT+Ktp7fz5bue4NaHSr6PnohCqViHItwzMIN5MxszvZ9sCHW3pUnXl+eGxtnper552Wm89+w5fP4Xj7Juy56yfI5MTgqlIvUNDIbdBAASBvNbmuhuT7GkLc3SjjQnz04xpTb4f8U9c6dz69XncNOaTXz1nidV6UVKQnNKReobGK26U/lUJYwFs5qGJp+729Oc3JqiviYZeFtGk0wY71rWxcVLZvONXz7Ff/z+WQYGg/93JfGQrq8uWw9/pKJCycw+BnyITCWSR8gUDriOTJWTQWAbcGV218mR5z4D7AUGgH537ylJy7P6+svbU6qpSnBya9ORb8Da0ixonUJtVXQCaCzphmquf3M371rWxRd+8RirN2gJgRy7oIZuUEQo5VXIXezuB83sJjIFAL7q7p/NvuZaMuWWrhrlbc5397L8begt4fCtvjrJ4rYUS9oyvZ8lbWnmz5pCdbLi15iyYFYT//nBZax67EW+ePtjbN59MOwmSQWJVCjlva7ezPrIVsh19/xZ1EZGr5JbVsc7pzSltiobQGmWdmR+ntA8JZAVq2ExM17f3cqrFzTzb7/ZyP/59X9pCYEUJaj5JCgilNx9i5nlKuQeBFa5+yoAM/sH4H3Ay8D5o70FsMrMHPiOuxeshGJmK4GVAF1dXUU1fmDQKWaaJF1fzZLsBHR3djX03BmNZa3yGWV11Uk+esF83nZGB/901xP8Yq2WEMjYguwpmfvYf6uzFXJ/BrwTeAn4CfBTd/9B3ms+DdRlq56MPL/N3beaWQtwL/BRd//NWJ/Z09Pja9asGbfxh/oGWPTZu4cdy92GsTQbQkva03RMqx9aBS1Hu//pXVx/26M8/ryWEEhhP/jgWZwzf2ZJ39PMHiw0x1zM8G2oQm72jW4GlgM/yHvNj4A7KFCGKTf57e7bzOwWYBkwZigVa2DQuWBRy1DvZ2lHethtGFKcZfOmc/tHz+HGB57ja/c8ye4DfWE3SSImanNKQxVyyQzfLgDWmNl8d1+ffc0lwBMjTzSzRiDh7nuzjy8EvlCapkNjbRXfu/LMUr3dpJZMGJefNYc3Lp3NN+5bz3/+QUsIJCOZMGZPHfuWp1KaSIXcL5vZOjN7mEzY/DUMr5ALzAJWm9la4H7gDne/e+RnSHRMbajh85d0c+e1r2L5iTPCbo5EQNvUukC/gR53TikMxc4pSXm5O3eve4Ev3fE4W17SEoLJasVJM/jhh84u+fuONqdU+QtwpGzMjDcsnc0vP/FqPvbaBdRV64/LZBTkfBIolKQIddVJ/vq18/nlJ86juak27OZIwDqmKZQkotqn1vPBc+aF3QwJmHpKEmmXn9VFqq7i7+OWY6BQkkhrqqvmiuVzw26GBEihJJF35fK5mvSeJJpqq5jaEMyWJTn6kyXHbMaUWi47s7j7E6WydU5vCPwOCYWSHJe/PPcEqibpDc2TSdBDN1AoyXFqn1rPW05rD7sZUmZdMxRKUkGuOu9EdO9zvAW5j1KOQkmO24nNU7iouzXsZkgZafgmFecj550UdhOkjBRKUnGWdqR5VYk3/5JoMMvMHQZNoSQTpt5SPLWl66mpCj4iFEoyYWefMJ3TuqaG3Qwpsc7pwfeSQKEkJWBmXK3eUuyEMZ8ECiUpkdcsamHhrKawmyElFOlQMrOPmdmj2e1vf2xmdWb2RTN72MweMrNVZtY2yrkXmdmTZrbBzD5V2uZLVCQSxofPOzHsZkgJhbFGCYoIpbwKuT3uvgRIcqRC7inufipwO5kKuSPPTQLfBt4ALAbeZWaLS9d8iZI3nTI7tHkIKb3IhlJWrkJuFcdWIXcZsMHdN7p7L3AjcOlEGizRVZVM8FfnqrcUF5Edvrn7FiBXIfd54OX8Crlmtgm4nAI9JaAd2JT3fHP2mMTU28/oYH7LlLCbIRPUUJNkRmNNKJ9dzPBtGpnezTygDWg0s/cAuPt17t4J/BC4ptDpBY4VLJ9iZivNbI2Zrdm+fXux7ZeIqatO8tMPL9eCygrXFcKWJTnFDN+GKuS6ex+Qq5Cb70fA2wqcuxnozHveARQsXO/uN7h7j7v3NDc3F9Esiap0fTXfv/JMLj9Ley5VqrDmk6C4UBqqkGuZ6LwAeNzM5ue9pmCFXOABYL6ZzTOzGjIT5LdNtNESfdXJBF/6iyVc/+bFaNulyhPWfBKUuUKuu/eTGdbdAzwO3OTuj5bjQiR6zIz3r5jHd6/oobEmGXZz5BiEGUqqkCuBeOKFPXzw/65Rpd0K8e9Xnsn5i1rK+hmqkCuhWtSa4udXr+DUzqlhN0WKEPU5JZGSaG6q5caVZ/OmU2aH3RQZR8e08BbBKpQkUHXVSf75stO49oL5479YQtGaqqOuOrw5QIWSBC6RMD7+ugV8452nUpPUH8GoCXOSGxRKEqK/OK2dH688K7SVw1JYmPNJoFCSkJ0xZzo/v3qFbk2JEPWUZNLrnN7Azz6ynFcv0Er+KOiaEe5ODwoliYRUXTXfu6KHK145J+ymTHrqKYlkVSUT/P2lS/j7S7p1a0qIOqcplESGuWL5XL5/5ZlMqa0KuymTTm1Vguam2lDboFCSSDpvYQs/+/By2tJ1YTdlUglzy5IchZJE1sLWJjpCnt+YbMKeTwKFkkTY/sP9/Pm53WE3Y1IJe40SKJQkwv749E76BqK3i0WcqackMobV63eG3YRJR6EkMobVG7RXe9C6ZiiURAp6cc8hnnpxX9jNmHTCXqMECiWJqN9t2BF2Eyad5qZa6iOwbfFEynZ/1cyeyJbuvsXMpo5y7jNm9ki2vLf2uJWirF6vUApaFOaTYGJlu+8Flrj7KcBTwKfHeJvz3f3UQvvxiozk7qxWTylwFRNKWYXKdq/KVisB+AOZmm4iE7Z+2z627T0cdjMmnSisUYIJlu3O8wHgrtHeAlhlZg+a2crRPkcVciXntxq6haJiekpjle3O/v46oJ9M6e5CVrj76cAbgKvN7NxCL1KFXMlpnxrufj6TVcWEEmOU7TazK4A3AZf7KAXk3H1r9uc24BZgWSkaLvF10ZJWLupuDbsZk04lhdJoZbsvAv4WuMTdDxQ60cwazawp95hMJd11pWm6xNkXLu0mVaetS4JSU5WgJeQtS3ImUrb7W0ATcG/26/5/heFlu4FZwGozWwvcD9zh7neX/jIkblpSdfzdmxaH3YxJo2NaPYmI7KxX1P+K3P164PoRh08a5bVbgYuzjzcCr5hIA2XyescZHdz20FYtDwhAVIZuoBXdEmFmxj+9dSn1IRZGnCwUSiJF6pzewCdfvzDsZsSeQknkGFy5fC6ndk4NuxmxFpWFk6BQkgqQTBj/8+2nUJ2MxkRsHKmnJHKMFsxq4urzC363IiWgnpLIcfjIeSexcFZT2M2InRmNNZEqZ6VQkopRU5XgK28/RYUqSyxKvSRQKEmFObVzKh9YMS/sZsRKlOaTQKEkFejjFy6gc7pu2i0VhZLIBDXUVPHlt54SdjNiQ6EkUgIrTprJO3s6w25GLGhOSaREPvPGkyNzZ3sli0JZpXwKJalY6fpqvnDpkrCbUdGqk0Zrqi7sZgyjUJKKdtGSVi5eqg3hjlfHtAaSEVtjoVCSivf5S7pJ11eH3YyKFLX5JFAoSQy0NNXxWW0Id1w6p0VvaYVCSWLhbae386r5M8NuRsWJ2nIACKZC7kVm9qSZbTCzT5W09SJZZsY/vmUpDREoO11JKjKUJlIh18ySwLfJlFdaDLzLzNTPlrLonN7A/9CGcMekkueUjrdC7jJgg7tvdPde4EYyNeREyuJ9r5zL6V1Tw25GxYjaGiUof4XcdmBT3vPN2WNHUYVcKYVkwvjK206hJqnp0vFMbagmVRe9by3LXSG30AKI0YpWqkKulMT8WU1c8xptCDeeKM4nQfkr5G4G8m9Q6gC2TqzJIuO76tUnsqhVG8KNJYrzSVDmCrnAA8B8M5tnZjVkJshvK0XDRcZSU5XgK2/ThnBjqdie0kQq5GYnwq8B7gEeB25y90fLcSEiI72icyofPEcbwo0mqqFU1gq52ed3AncWeq1IuX38dQu559EXeW7XaJ350SUMEmaYZdZBJQwMGzp+qH+AvoGCU6QVoaJDSaRS1dckufXqFew73E8yYVQljMTIn5b7Z3gIjefFPYf47M/XseqxFwO4ktJTKImEZFpjDdMaa0r+vrNSdXznvWdw17oX+Nyt69ixr7fkn1EuyYQxOx2tLUtytJhDZALMjIuXzua+j7+at59RaP1wNLVPracqomu5otkqkQoztaGGr73jFfzHB5bREcE770eK6tANFEoiJXXugmbu+e/n8v4VcyliWio0UV2jBAolkZJrrK3i+jd389OrljO/ZUrYzSlIPSWRSeiMOdO4/dpzuPaC+VRFbBVnlOvmKZREyqi2KsnHX7eA2689h1d0pMNuzhD1lEQmuUWtKW7+yAr+7o0nU1cd/l87hZKIkEwYH3rVCdx2zTmhToI31VVFutCCQkkkYAtmNXHegvC25+ma3lDUivWwKJREQvDus+aE9tlRHrqBQkkkFOcvbA6tMq1CSUSOUpVM8N/O7Bz/hWUQ5YWToFASCc1lZ3aGsgmdekoiUlDb1HrOX9gS+OcqlERkVO8+qyvQz0tYJgyjbCIVct+RPTZoZj1jnPuMmT2S3TJ3TemaLlL5zlvYQluA+xrNTtdTUxXtvshEKuSuA94K/KaIzznf3U9191HDS2QySiaMd54ZXG8p6kM3KH7nyVyF3D6OVMh9HIrbNlRERvfOMzv551+tZ2CwNPt9N9QkaU3VMStVR2s6+zNVS2u6jpNaol92atxQcvctZparkHsQWFWgQu6YbwGsMjMHvuPuNxR6kZmtBFYCdHUFO84WCVNruo7XLGrh3nH2+jaDmVNq8wKndlj4tKbqmJWuo6m2qqI7C+OG0ogKuS8BPzGz97j7D4r8jBXuvtXMWsiUY3rC3Y8a8mXD6gaAnp6eyi0RIXIc3nv2HDZs28esVO1QuLSm6oY9bm6qpTqiW9iWUjHDt6EKuQBmlquQW1QoZUsu4e7bzOwWYBnFzUOJTBrnLmjm/33yvLCbEQnHXSG3mDc3s0Yza8o9Bi4kM0EuIlLQcVfINbO3mNlm4JXAHWZ2DwyvkAvMAlab2VrgfuAOd7+7DNchIjFh7tGbvunp6fE1a7SkSSTOzOzBQsuE4j9rJiIVRaEkIpGiUBKRSFEoiUikKJREJFIUSiISKZFcEmBm24Fnj/G0mcCOMjQnynTN8Rfn653j7keVdYlkKB0PM1sz2bZG0TXH32S7XtDwTUQiRqEkIpESp1AquE9TzOma42+yXW985pREJB7i1FMSkRhQKIlIpMQmlMzsk2bmZjYz79inzWyDmT1pZq8Ps32lYmZfNLOHsyWrVplZW97vYne9AGb2VTN7Invdt5jZ1LzfxfWaRy1hFtdrHuLuFf8P0AncQ2bB5czsscXAWqCWzP7i/wUkw25rCa41lff4WuBf43y92Wu7EKjKPv4K8JVJcM0nAwuBX5Mpb5Y7Httrzv0Tl57S14G/IVM5JedS4EZ3P+zuTwMbyOwPXtHcfU/e00aOXHMsrxfA3Ve5e3/26R+AjuzjOF/z4+7+ZIFfxfaacyo+lMzsEmCLu68d8at2YFPe883ZYxXPzP7BzDYBlwOfyx6O7fWO8AHgruzjyXLN+WJ/zcUWowyVmd0HtBb41XXAZ8h07486rcCxilj/MNb1uvut7n4dcJ2ZfRq4BrieCr5eGP+as6+5DugHfpg7rcDrY3XNhU4rcKxirrkYFRFK7v7aQsfNbCmZcfXabPG9DuBPZraMzP9BOvNe3gFsLXNTS2K06y3gR8AdZEKpYq8Xxr9mM7sCeBNwgWcnV4j5NY+ioq+5GBU9fHP3R9y9xd3nuvtcMv/BTnf3F4DbgMvMrNbM5gHzyVRUqWhmNj/v6SXAE9nHsbxeADO7CPhb4BJ3P5D3q9he8xhif80V0VM6Hu7+qJndBDxGpst/tbsPhNysUviymS0EBsl823gVxPp6Ab5F5tume7M94j+4+1VxvmYzewvwv4FmMiXMHnL318f5mnN0m4mIREpFD99EJH4USiISKQolEYkUhZKIRIpCSUQiRaEkIpGiUBKRSPn/MQoOtiy4aqQAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "potential_interested_area_gpd[potential_interested_area_gpd['id'] == 'Wandel_Sea'].plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9bcdfadc-8f87-4df1-8fe5-4e6f495f16cd",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "8e791be3-751f-4858-bdc1-35c8df4dd8ee",
   "metadata": {},
   "outputs": [],
   "source": [
    "# transfer geopandas road network to sedona df\n",
    "schema = StructType(\n",
    "    [\n",
    "        StructField(\"id\", StringType(), False),\n",
    "        StructField(\"geometry\", GeometryType(), False)\n",
    "    ]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "4143af10-89c0-4320-89d7-47beb8dbf44d",
   "metadata": {},
   "outputs": [],
   "source": [
    "## create Sedona with only select POI, currently 'Wandel_Sea'\n",
    "\n",
    "potential_interested_area_spark = spark.createDataFrame(potential_interested_area_gpd[potential_interested_area_gpd['id'] == Paoi], schema, verifySchema=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "93d6b223-00e9-4a1d-bf8c-d16d0f345587",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+------------+--------------------+\n",
      "|          id|            geometry|\n",
      "+------------+--------------------+\n",
      "|  Wandel_Sea|POLYGON ((-29.972...|\n",
      "|Beaufort_Sea|POLYGON ((-156.95...|\n",
      "+------------+--------------------+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "potential_interested_area_spark.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "e51aa010-e326-4d55-83d1-1ed7bb226c75",
   "metadata": {},
   "outputs": [],
   "source": [
    "# geo name confuse\n",
    "potential_interested_area_spark = potential_interested_area_spark.withColumnRenamed('geometry', 'PIA_geo')\n",
    "potential_interested_area_spark = potential_interested_area_spark.withColumnRenamed('id', 'PIA_name')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "0c74b534-5354-4b62-9dcb-b20303d6f872",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------+--------+--------------------+-------------+----------+----------+------------+--------------------+\n",
      "|            geometry|      id|                uuid|         date|Instrument| Satellite|    PIA_name|             PIA_geo|\n",
      "+--------------------+--------+--------------------+-------------+----------+----------+------------+--------------------+\n",
      "|MULTIPOLYGON (((-...|31123198|51b7077a-2abb-48b...|2020-06-30T22|       MSI|Sentinel-2|Beaufort_Sea|POLYGON ((-156.95...|\n",
      "|MULTIPOLYGON (((-...|31123195|25720886-c069-438...|2020-06-30T22|       MSI|Sentinel-2|Beaufort_Sea|POLYGON ((-156.95...|\n",
      "|MULTIPOLYGON (((-...|31123193|f99faa69-5c7c-44a...|2020-06-30T22|       MSI|Sentinel-2|Beaufort_Sea|POLYGON ((-156.95...|\n",
      "|MULTIPOLYGON (((-...|31123191|2ee9aa4a-fcec-445...|2020-06-30T22|       MSI|Sentinel-2|Beaufort_Sea|POLYGON ((-156.95...|\n",
      "|MULTIPOLYGON (((-...|31123169|90e071da-a0b2-4bf...|2020-06-30T22|       MSI|Sentinel-2|Beaufort_Sea|POLYGON ((-156.95...|\n",
      "|MULTIPOLYGON (((-...|31123167|8a973fca-cec6-406...|2020-06-30T22|       MSI|Sentinel-2|Beaufort_Sea|POLYGON ((-156.95...|\n",
      "|MULTIPOLYGON (((-...|31123166|a510c6ae-5607-4f4...|2020-06-30T22|       MSI|Sentinel-2|Beaufort_Sea|POLYGON ((-156.95...|\n",
      "|MULTIPOLYGON (((-...|31123163|5c82c50f-54e5-4fd...|2020-06-30T22|       MSI|Sentinel-2|Beaufort_Sea|POLYGON ((-156.95...|\n",
      "|MULTIPOLYGON (((-...|31123159|0cfdf23d-6a78-44f...|2020-06-30T22|       MSI|Sentinel-2|Beaufort_Sea|POLYGON ((-156.95...|\n",
      "|MULTIPOLYGON (((-...|31123158|8d480b88-96c4-419...|2020-06-30T22|       MSI|Sentinel-2|Beaufort_Sea|POLYGON ((-156.95...|\n",
      "|MULTIPOLYGON (((-...|31123140|d228608a-b4dd-4a0...|2020-06-30T22|       MSI|Sentinel-2|Beaufort_Sea|POLYGON ((-156.95...|\n",
      "|MULTIPOLYGON (((-...|31123139|d8a602ae-6c0b-4f4...|2020-06-30T22|       MSI|Sentinel-2|Beaufort_Sea|POLYGON ((-156.95...|\n",
      "|MULTIPOLYGON (((-...|31123148|37133798-c6cf-4cc...|2020-06-30T22|       MSI|Sentinel-2|Beaufort_Sea|POLYGON ((-156.95...|\n",
      "|MULTIPOLYGON (((-...|31123137|d3627f48-1a2d-487...|2020-06-30T22|       MSI|Sentinel-2|Beaufort_Sea|POLYGON ((-156.95...|\n",
      "|MULTIPOLYGON (((-...|31123132|b18b8a14-d2a9-451...|2020-06-30T22|       MSI|Sentinel-2|Beaufort_Sea|POLYGON ((-156.95...|\n",
      "|MULTIPOLYGON (((-...|31123144|ad0dea0b-3314-489...|2020-06-30T22|       MSI|Sentinel-2|Beaufort_Sea|POLYGON ((-156.95...|\n",
      "|MULTIPOLYGON (((-...|31123143|a2c2ef3c-7df8-45b...|2020-06-30T22|       MSI|Sentinel-2|Beaufort_Sea|POLYGON ((-156.95...|\n",
      "|MULTIPOLYGON (((-...|31123130|5c54f46f-e761-438...|2020-06-30T22|       MSI|Sentinel-2|Beaufort_Sea|POLYGON ((-156.95...|\n",
      "|MULTIPOLYGON (((-...|31123123|b1c65e9a-0a1e-40f...|2020-06-30T22|       MSI|Sentinel-2|Beaufort_Sea|POLYGON ((-156.95...|\n",
      "|MULTIPOLYGON (((-...|31123109|fef46f4b-1d14-4ae...|2020-06-30T22|       MSI|Sentinel-2|Beaufort_Sea|POLYGON ((-156.95...|\n",
      "+--------------------+--------+--------------------+-------------+----------+----------+------------+--------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# check direct intersection between sentinel data and potential area of interest\n",
    "potential_interested_area_spark.createOrReplaceTempView(\"potential_interested_area_spark\")\n",
    "\n",
    "sentinel_sedona = spark.sql('SELECT *'\n",
    "                     'FROM sentinel_sedona, potential_interested_area_spark '\n",
    "                     'WHERE ST_Intersects(sentinel_sedona.geometry, potential_interested_area_spark.PIA_geo)')\n",
    "\n",
    "sentinel_sedona.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "welcome-presence",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "waiting-timothy",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "individual-turner",
   "metadata": {},
   "source": [
    "## Fourth, spatio-temporal join Sentinel dataset and ICE-Sat 2 dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "mighty-malawi",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3.1 join the two Sedona dataframe according the timestamps, hour level at now, consider time delay"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "material-premiere",
   "metadata": {},
   "outputs": [],
   "source": [
    "sentinel_sedona = sentinel_sedona.withColumn('sentinel_max_timestamp', (F.unix_timestamp(\"Sentinel_date_date_type\") + hour_delay * 3600)).\\\n",
    "withColumn('sentinel_min_timestamp', (F.unix_timestamp(\"Sentinel_date_date_type\") - hour_delay * 3600))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "color-riverside",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------+--------------------+-----------------------+----------------------+----------------------+\n",
      "|            geometry|            s2_index|Sentinel_date_date_type|sentinel_max_timestamp|sentinel_min_timestamp|\n",
      "+--------------------+--------------------+-----------------------+----------------------+----------------------+\n",
      "|POLYGON ((166.768...|20191231T235959_2...|    2020-01-01 00:01:29|            1577876489|            1577833289|\n",
      "|POLYGON ((167.185...|20191231T235959_2...|    2020-01-01 00:01:20|            1577876480|            1577833280|\n",
      "|POLYGON ((167.144...|20191231T235959_2...|    2020-01-01 00:01:05|            1577876465|            1577833265|\n",
      "|POLYGON ((169.663...|20191231T235959_2...|    2020-01-01 00:01:27|            1577876487|            1577833287|\n",
      "|POLYGON ((167.573...|20191231T235959_2...|    2020-01-01 00:00:51|            1577876451|            1577833251|\n",
      "|POLYGON ((168.518...|20191231T235959_2...|    2020-01-01 00:00:38|            1577876438|            1577833238|\n",
      "|POLYGON ((169.588...|20191231T235959_2...|    2020-01-01 00:01:17|            1577876477|            1577833277|\n",
      "|POLYGON ((169.555...|20191231T235959_2...|    2020-01-01 00:01:02|            1577876462|            1577833262|\n",
      "|POLYGON ((169.641...|20191231T235959_2...|    2020-01-01 00:01:27|            1577876487|            1577833287|\n",
      "|POLYGON ((169.471...|20191231T235959_2...|    2020-01-01 00:01:12|            1577876472|            1577833272|\n",
      "|POLYGON ((169.437...|20191231T235959_2...|    2020-01-01 00:00:58|            1577876458|            1577833258|\n",
      "|POLYGON ((170.575...|20191231T235959_2...|    2020-01-01 00:01:30|            1577876490|            1577833290|\n",
      "|POLYGON ((169.494...|20191231T235959_2...|    2020-01-01 00:01:25|            1577876485|            1577833285|\n",
      "|POLYGON ((171.457...|20191231T235959_2...|    2020-01-01 00:01:07|            1577876467|            1577833267|\n",
      "|POLYGON ((171.989...|20191231T235959_2...|    2020-01-01 00:00:53|            1577876453|            1577833253|\n",
      "|POLYGON ((170.999...|20191231T235959_2...|    2020-01-01 00:01:19|            1577876479|            1577833279|\n",
      "|POLYGON ((169.559...|20191231T235959_2...|    2020-01-01 00:00:48|            1577876448|            1577833248|\n",
      "|POLYGON ((169.524...|20191231T235959_2...|    2020-01-01 00:00:34|            1577876434|            1577833234|\n",
      "|POLYGON ((168.494...|20191231T235959_2...|    2020-01-01 00:00:19|            1577876419|            1577833219|\n",
      "|POLYGON ((169.425...|20191231T235959_2...|    2020-01-01 00:00:08|            1577876408|            1577833208|\n",
      "+--------------------+--------------------+-----------------------+----------------------+----------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "sentinel_sedona.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "preceding-correction",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "root\n",
      " |-- geometry: geometry (nullable = false)\n",
      " |-- s2_index: string (nullable = true)\n",
      " |-- Sentinel_date_date_type: timestamp (nullable = true)\n",
      " |-- sentinel_max_timestamp: long (nullable = true)\n",
      " |-- sentinel_min_timestamp: long (nullable = true)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "sentinel_sedona.printSchema()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "heated-plastic",
   "metadata": {},
   "outputs": [],
   "source": [
    "# to gpd dataframe for baseline \n",
    "# sentinel_sedona_gpd = gpd.GeoDataFrame(sentinel_sedona.toPandas(), geometry='geometry')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "distant-modeling",
   "metadata": {},
   "outputs": [],
   "source": [
    "# sentinel_sedona_gpd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "removable-investor",
   "metadata": {},
   "outputs": [],
   "source": [
    "# sentinel_sedona_gpd['Sentinel_date_unix'] = sentinel_sedona_gpd['Sentinel_date_date_type'].astype(np.int)// 10**9\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "quality-negotiation",
   "metadata": {},
   "outputs": [],
   "source": [
    "# sentinel_sedona_gpd['S2_unix'] = sentinel_sedona_gpd['Sentinel_date_unix']\n",
    "# sentinel_sedona_gpd['S2_time_max'] = sentinel_sedona_gpd['sentinel_max_timestamp']\n",
    "# sentinel_sedona_gpd['S2_time_min'] = sentinel_sedona_gpd['sentinel_min_timestamp']\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "fresh-container",
   "metadata": {},
   "outputs": [],
   "source": [
    "# gpd.GeoDataFrame(sentinel_sedona_gpd[['s2_index', 'S2_unix', 'S2_time_max', 'S2_time_min', 'geometry']]).to_file('sentinel_6_month.shp')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "beneficial-commodity",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "root\n",
      " |-- orbit: geometry (nullable = false)\n",
      " |-- day_key: integer (nullable = true)\n",
      " |-- day_cycle: integer (nullable = true)\n",
      " |-- date_date_type: timestamp (nullable = true)\n",
      " |-- day_exact_time: timestamp (nullable = true)\n",
      " |-- day_hour: integer (nullable = true)\n",
      " |-- Description: string (nullable = true)\n",
      " |-- is_timestamp: long (nullable = true)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "is2_df.printSchema()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "dietary-canberra",
   "metadata": {},
   "outputs": [],
   "source": [
    "# is2_df_pd = is2_df.toPandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "flying-continuity",
   "metadata": {},
   "outputs": [],
   "source": [
    "# is2_df_pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "assisted-paris",
   "metadata": {},
   "outputs": [],
   "source": [
    "# gpd.GeoDataFrame(is2_df_pd[['orbit', 'Description', 'is_timestamp']], geometry='orbit').to_file('is2_6month.shp')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "meaningful-executive",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "polar-street",
   "metadata": {},
   "outputs": [],
   "source": [
    "sentinel_sedona.createOrReplaceTempView(\"sentinel_sedona\")\n",
    "test_spatial_join = spark.sql(\"SELECT * FROM sentinel_sedona, is2_df WHERE ST_Intersects(sentinel_sedona.geometry, is2_df.orbit)\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "mounted-graduate",
   "metadata": {},
   "outputs": [],
   "source": [
    "# import time\n",
    "# start_spatial_time = time.time()\n",
    "# test_spatial_join.select('uuid', 'Description', 'is_timestamp').write.format('com.databricks.spark.csv')\\\n",
    "#         .mode(\"overwrite\")\\\n",
    "#         .option('header', True)\\\n",
    "#         .save('sentinel_is2_intersect_pure_spatial')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "defined-strip",
   "metadata": {},
   "outputs": [],
   "source": [
    "# with open('speed_pure_spatial_join_month.txt', 'w') as f:\n",
    "#     f.write('pure spatial join time:')\n",
    "#     f.write(str(time.time() - start_spatial_time))\n",
    "#     f.write('/n')\n",
    "#     f.write('number of spatial intersections:')\n",
    "#     f.write(str(test_spatial_join.count()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "matched-bible",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "root\n",
      " |-- geometry: geometry (nullable = false)\n",
      " |-- s2_index: string (nullable = true)\n",
      " |-- Sentinel_date_date_type: timestamp (nullable = true)\n",
      " |-- sentinel_max_timestamp: long (nullable = true)\n",
      " |-- sentinel_min_timestamp: long (nullable = true)\n",
      " |-- orbit: geometry (nullable = false)\n",
      " |-- day_key: integer (nullable = true)\n",
      " |-- day_cycle: integer (nullable = true)\n",
      " |-- date_date_type: timestamp (nullable = true)\n",
      " |-- day_exact_time: timestamp (nullable = true)\n",
      " |-- day_hour: integer (nullable = true)\n",
      " |-- Description: string (nullable = true)\n",
      " |-- is_timestamp: long (nullable = true)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "test_spatial_join.printSchema()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "linear-royal",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_spatial_join = test_spatial_join.cache()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "serious-prime",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_ST_join = test_spatial_join.filter(F.col('sentinel_max_timestamp') >= F.col('is_timestamp')).filter(F.col('sentinel_min_timestamp') <= F.col('is_timestamp'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "august-scanning",
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "start_temporal_time = time.time()\n",
    "test_ST_join.select('s2_index', 'Description', 'is_timestamp').write.format('com.databricks.spark.csv')\\\n",
    "        .mode(\"overwrite\")\\\n",
    "        .option('header', True)\\\n",
    "        .save('AOI_sentinel_is2_intersect_spatial_temporal')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "enormous-founder",
   "metadata": {},
   "outputs": [],
   "source": [
    "# with open('ISAT_join_{}_hours_delay_S2I2.txt'.format(str(hour_delay)), 'w') as f:\n",
    "#     f.write('ISAT time in total:')\n",
    "#     f.write(str(time.time() - start_temporal_time))\n",
    "#     f.write('\\n')\n",
    "#     f.write('number of spatial intersections:')\n",
    "#     f.write(str(test_ST_join.count()))\n",
    "#     f.write('\\n')\n",
    "#     f.write('from and to date')\n",
    "#     f.write(start_date)\n",
    "#     f.write('\\n')\n",
    "#     f.write(end_date)\n",
    "#     f.write('\\n')\n",
    "#     f.write('ICEsat2 data count:')\n",
    "#     f.write(str(is2_df_raw.count()))\n",
    "#     f.write('\\n')\n",
    "\n",
    "#     f.write('Sentinel 2 data count:')\n",
    "#     f.write(str(sentinel_sedona.count()))\n",
    "    \n",
    "    \n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "broad-portrait",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "activated-panel",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "specified-partnership",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
